import sys
sys.setrecursionlimit(10000)
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
import os
os.environ['GNUMPY_IMPLICIT_CONVERSION'] = 'ignore'
print os.environ.get('GNUMPY_IMPLICIT_CONVERSION')
import cPickle
import gzip
from breze.learn.data import one_hot
from breze.learn.base import cast_array_to_local_type
from breze.learn.utils import tile_raster_images
import climin.stops
import climin.initialize
from climin import mathadapt as ma
from breze.learn import hvi
from breze.learn.hvi import HmcViModel
from breze.learn.hvi.energies import (NormalGaussKinEnergyMixin, DiagGaussKinEnergyMixin)
from breze.learn.hvi.inversemodels import MlpGaussInvModelMixin
from matplotlib import pyplot as plt
from matplotlib import cm
import numpy as np
#import fasttsne
from IPython.html import widgets
%matplotlib inline
import theano
theano.config.compute_test_value = 'ignore'#'raise'
from theano import (tensor as T, clone)
datafile = '../mnist.pkl.gz'
# Load data.
with gzip.open(datafile,'rb') as f:
train_set, val_set, test_set = cPickle.load(f)
X, Z = train_set
VX, VZ = val_set
TX, TZ = test_set
Z = one_hot(Z, 10)
VZ = one_hot(VZ, 10)
TZ = one_hot(TZ, 10)
X_no_bin = X
VX_no_bin = VX
TX_no_bin = TX
# binarize the MNIST data
np.random.seed(0)
X = np.random.binomial(1, X) * 1.0
VX = np.random.binomial(1, VX) * 1.0
TX = np.random.binomial(1, TX) * 1.0
image_dims = 28, 28
X_np, Z_np, VX_np, VZ_np, TX_np, TZ_np, X_no_bin_np, VX_no_bin_np, TX_no_bin_np = X, Z, VX, VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin
X, Z, VX, VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin = [cast_array_to_local_type(i)
for i in (X, Z, VX,VZ, TX, TZ, X_no_bin, VX_no_bin, TX_no_bin)]
print X.shape
fig, ax = plt.subplots(figsize=(9, 9))
img = tile_raster_images(X_np[:64], image_dims, (8, 8), (1, 1))
ax.imshow(img, cmap=cm.binary)
fast_dropout = False
if fast_dropout:
class MyHmcViModel(HmcViModel,
hvi.FastDropoutMlpBernoulliVisibleVAEMixin,
hvi.FastDropoutMlpGaussLatentVAEMixin,
DiagGaussKinEnergyMixin,
MlpGaussInvModelMixin):
pass
kwargs = {
'p_dropout_inpt': .1,
'p_dropout_hiddens': [.2, .2],
}
print 'yeah'
else:
class MyHmcViModel(HmcViModel,
hvi.MlpBernoulliVisibleVAEMixin,
hvi.MlpGaussLatentVAEMixin,
DiagGaussKinEnergyMixin,
MlpGaussInvModelMixin):
pass
kwargs = {}
batch_size = 500
#optimizer = 'rmsprop', {'step_rate': 1e-4, 'momentum': 0.95, 'decay': .95, 'offset': 1e-6}
#optimizer = 'adam', {'step_rate': .5, 'momentum': 0.9, 'decay': .95, 'offset': 1e-6}
optimizer = 'adam', {'step_rate': 0.0005}
# This is the number of random variables NOT the size of
# the sufficient statistics for the random variables.
n_latents = 2
n_hidden = 200
m = MyHmcViModel(X.shape[1], n_latents,
[n_hidden, n_hidden], ['rectifier'] * 2,
[n_hidden, n_hidden], ['rectifier'] * 2,
[n_hidden], ['rectifier'] * 1,
n_hmc_steps=3, n_lf_steps=4,
n_z_samples=1,
optimizer=optimizer, batch_size=batch_size, allow_partial_velocity_update=False, perform_acceptance_step=False,
**kwargs)
#climin.initialize.randomize_normal(m.parameters.data, 0.1, 1e-1)
#m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.2)
#m.parameters.__setitem__(m.kin_energy.mlp.layers[-1].bias, 1)
old_best_params = None
#print m.score(TX)
print m.parameters.data.shape
FILENAME = 'hvi_gen2_recog2_aux1_late2_hid200_fullbin_untrained_new.pkl'
# In[5]:
#old_best_params = None
f = open(FILENAME, 'rb')
np_array = cPickle.load(f)
old_best_params = cast_array_to_local_type(np_array)
f.close()
print old_best_params.shape
m.parameters.data = old_best_params.copy()
#old_best_loss = m.score(VX)
print m.score(VX)
print m.score(TX)
print m.parameters.view(m.init_recog.mlp.layers[2].bias)
m.parameters.__setitem__(m.hmc_sampler.step_size_param, 0.2)
m.parameters.__setitem__(m.init_recog.mlp.layers[2].bias, cast_array_to_local_type(np.array([-0.34611687, -0.09502647, -1.77520561, -2.25207138])))
m.parameters.__setitem__(m.kin_energy.variance_parameter, cast_array_to_local_type(np.array([-0.7, -0.7])))
print 0.1 * m.parameters.view(m.hmc_sampler.step_size_param) ** 2 + 1e-8
#print m.estimate_nll(TX, 500)
print m.score(VX_no_bin)
print m.score(TX_no_bin)
TARGET_FILENAME = 'hvi_gen2_recog2_aux1_late2_hid200_fullbin_3hmc_04lf_pretrained'
FILETYPE_EXTENSION = '.pkl'
old_best_params = None
max_passes = 500
max_iter = max_passes * X.shape[0] / batch_size
n_report = X.shape[0] / batch_size
stop = climin.stops.AfterNIterations(max_iter)
pause = climin.stops.ModuloNIterations(n_report)
# theano.config.optimizer = 'fast_compile'
for i, info in enumerate(m.powerfit((X_no_bin,), (VX,), stop, pause, eval_train_loss=False)):
print i, info['loss'], info['val_loss']
if i == 0 and old_best_params is not None:
if info['best_loss'] > old_best_loss:
info['best_loss'] = old_best_loss
info['best_pars'] = old_best_params
if info['best_loss'] == info['val_loss']:
f = open(TARGET_FILENAME + FILETYPE_EXTENSION, 'wb')
cPickle.dump(m.parameters.data, f, protocol=cPickle.HIGHEST_PROTOCOL)
f.close()
#train_result_params = m.parameters.data.copy()
m.parameters.data = info['best_pars']
m.score(VX)
f_z_init_sample = m.function(['inpt'], m.init_recog.sample(), numpy_result=True)
f_z_sample = m.function(['inpt'], m.hmc_sampler.output, numpy_result=True)
f_gen = m.function([m.gen.inpt], m.gen.sample(), numpy_result=True)
f_gen_rate = m.function([m.gen.inpt], m.gen.rate, numpy_result=True)
f_joint_nll = m.function(['inpt'], m.joint_nll, numpy_result=True)
curr_pos = T.matrix('current_position')
curr_vel = T.matrix('current_velocity')
norm_noise = T.matrix('normal_noise')
unif_noise = T.vector('uniform_noise')
new_sampled_vel = m.hmc_sampler.kin_energy.sample(norm_noise)
updated_vel = m.hmc_sampler.partial_vel_constant * curr_vel + m.hmc_sampler.partial_vel_complement * new_sampled_vel
performed_hmc_steps = m.hmc_sampler.perform_hmc_steps(curr_pos, curr_vel)
hmc_step = m.hmc_sampler.hmc_step(curr_pos, curr_vel, np.float32(0), norm_noise, unif_noise)
lf_step_results = m.hmc_sampler.simulate_dynamics(curr_pos, curr_vel, return_full_list=True)
f_pot_en = m.function(['inpt', curr_pos], m.hmc_sampler.eval_pot_energy(curr_pos), numpy_result=True)
f_kin_en = m.function(['inpt', curr_vel], m.kin_energy.nll(curr_vel).sum(-1), numpy_result=True)
f_perform_hmc_steps = m.function(['inpt', curr_pos, curr_vel],
T.concatenate([performed_hmc_steps[0], performed_hmc_steps[1]], axis=1))
f_hmc_step = m.function(['inpt', curr_pos, curr_vel, norm_noise, unif_noise],
T.concatenate([hmc_step[0], hmc_step[1]],axis=1), on_unused_input='warn')
f_kin_energy_sample_from_noise = m.function(['inpt', norm_noise], new_sampled_vel)
f_updated_vel_from_noise = m.function(['inpt', curr_vel, norm_noise], updated_vel)
f_perform_lf_steps = m.function(['inpt', curr_pos, curr_vel],
T.concatenate([lf_step_results[0], lf_step_results[1]], axis=0))
f_z_init_mean = m.function(['inpt'], m.init_recog.mean, numpy_result=True)
f_z_init_var = m.function(['inpt'], m.init_recog.var, numpy_result=True)
f_v_init_var = m.function(['inpt'], T.extra_ops.cpu_contiguous(m.kin_energy.var), numpy_result=True)
full_sample = m.hmc_sampler.sample_with_path()
f_full_sample = m.function(['inpt'], T.concatenate([full_sample[0], full_sample[1]], axis=1))
final_pos = T.matrix('final_pos')
final_vel = T.matrix('final_vel')
inpt_replacements = {m.final_vel_model_inpt['position']: final_pos,
m.final_vel_model_inpt['time']: T.cast(m.hmc_sampler.n_hmc_steps, dtype='float32')}
final_vel_model_var = clone(m.final_vel_model.var, replace=inpt_replacements)
final_vel_model_mean = clone(m.final_vel_model.mean, replace=inpt_replacements)
final_vel_model_nll = clone(m.final_vel_model.nll(final_vel).sum(-1), replace=inpt_replacements)
f_v_final_var = m.function(['inpt', final_pos], final_vel_model_var, numpy_result=True)
f_v_final_mean = m.function(['inpt', final_pos], final_vel_model_mean, numpy_result=True)
f_v_final_model_nll = m.function(['inpt', final_pos, final_vel], final_vel_model_nll, numpy_result=True)
f_kin_energy_nll = m.function(['inpt'], m.kin_energy.expected_nll, numpy_result=True)
f_init_recog_nll = m.function(['inpt'], m.init_recog.expected_nll.sum(-1), numpy_result=True)
print f_init_recog_nll(VX).mean()
init_var = f_z_init_var(VX)
print init_var.mean()
print init_var.max()
print init_var.min()
fig, axs = plt.subplots(2, 3, figsize=(27, 18))
### Original data
O = (X_no_bin_np[:64])[:, :784].astype('float32')
img = tile_raster_images(O, image_dims, (8, 8), (1, 1))
axs[0, 0].imshow(img, cmap=cm.binary)
O2 = (X_np[:64])[:, :784].astype('float32')
img = tile_raster_images(O2, image_dims, (8, 8), (1, 1))
axs[1, 0].imshow(img, cmap=cm.binary)
### Reconstruction
#z_sample = f_z_sample((X[:64]))
z_init_sample = cast_array_to_local_type(f_z_init_sample((X[:64])))
z_sample = f_perform_hmc_steps((X[:64]),
z_init_sample,
f_kin_energy_sample_from_noise((X[:64]),
cast_array_to_local_type(np.random.normal(size=(64, m.n_latent)).astype('float32')))
)[-1, :64, :]
R = f_gen_rate(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R, image_dims, (8, 8), (1, 1))
axs[0, 1].imshow(img, cmap=cm.binary)
Rinit = f_gen_rate(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit, image_dims, (8, 8), (1, 1))
axs[0, 2].imshow(img, cmap=cm.binary)
R2 = f_gen(z_sample)[:, :784].astype('float32')
img = tile_raster_images(R2, image_dims, (8, 8), (1, 1))
axs[1, 1].imshow(img, cmap=cm.binary)
Rinit2 = f_gen(z_init_sample)[:, :784].astype('float32')
img = tile_raster_images(Rinit2, image_dims, (8, 8), (1, 1))
axs[1, 2].imshow(img, cmap=cm.binary)
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
prior_sample = cast_array_to_local_type(np.random.randn(64, m.n_latent))
S = f_gen_rate(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
axs[0].imshow(img, cmap=cm.binary)
S2 = f_gen(prior_sample)[:, :784].astype('float32')
img = tile_raster_images(S2, image_dims, (8, 8), (1, 1))
axs[1].imshow(img, cmap=cm.binary)
#S3 = f_gen_rate(prior_sample)[:, :784].astype('float32')
#img = tile_raster_images(S, image_dims, (8, 8), (1, 1))
#axs[2, 2].imshow(img, cmap=cm.nipy_spectral)
# TODO: Axis titles, plot title, make this work if one selects two dimensions out of more than two (i.e. if n_latent>2)
from scipy.stats import norm as normal_distribution
unit_interval_positions = np.linspace(0.025, 0.975, 20)
positions = normal_distribution.ppf(unit_interval_positions)
print unit_interval_positions
print positions
latent_array = np.zeros((400, 2))
latent_array[:, 1] = -np.repeat(positions, 20) # because images are filled top -> bottom, left -> right (row by row)
latent_array[:, 0] = np.tile(positions, 20)
fig, axs = plt.subplots(1, 1, figsize=(24, 24))
F = f_gen_rate(cast_array_to_local_type(latent_array)).astype('float32')
img = tile_raster_images(F, image_dims, (20, 20), (1, 1))
#axs.imshow(img, cmap=cm.nipy_spectral)
axs.imshow(img, cmap=cm.binary)
L = f_z_sample(X)
L_init = f_z_init_sample(X)
dim1 = 0
dim2 = 1
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(L[:, dim1], L[:, dim2], c=Z[:].argmax(1), lw=0, s=5, alpha=.2)
axs[1].scatter(L_init[:, dim1], L_init[:, dim2], c=Z[:].argmax(1), lw=0, s=5, alpha=.2)
cax = fig.add_axes([0.95, 0.2, 0.02, 0.6])
cax.scatter(np.repeat(0, 10), np.arange(10), c=np.arange(10), lw=0, s=300)
cax.set_xlim(-0.1, 0.1)
cax.set_ylim(-0.5, 9.5)
plt.yticks(np.arange(10))
plt.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off')
cax.tick_params(axis='y', colors='white')
for tick in cax.yaxis.get_major_ticks():
tick.label.set_fontsize(14)
tick.label.set_color('black')
cax.spines['bottom'].set_color('white')
cax.spines['top'].set_color('white')
cax.spines['right'].set_color('white')
cax.spines['left'].set_color('white')
axs[0].set_title('After HMC steps')
axs[1].set_title('Initial recognition model')
axs[0].set_xlim(-3, 3)
axs[0].set_ylim(-3, 3)
axs[1].set_xlim(-3, 3)
axs[1].set_ylim(-3, 3)
fig, axs = plt.subplots(4, 5, figsize=(20, 16))
colors = cm.jet(np.linspace(0, 1, 10))
for i in range(5):
axs[0, i].scatter(L_init[Z[:].argmax(1) == i, dim1], L_init[Z[:].argmax(1) == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
axs[1, i].scatter(L[Z[:].argmax(1) == i, dim1], L[Z[:].argmax(1) == i, dim2], c=colors[i], lw=0, s=5, alpha=.2)
axs[0, i].set_title(str(i) + ' before HMC')
axs[1, i].set_title(str(i) + ' after HMC')
axs[2, i].scatter(L_init[Z[:].argmax(1) == (5+i), dim1], L_init[Z[:].argmax(1) == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
axs[3, i].scatter(L[Z[:].argmax(1) == (5+i), dim1], L[Z[:].argmax(1) == (5+i), dim2], c=colors[5+i], lw=0, s=5, alpha=.2)
axs[2, i].set_title(str(5+i) + ' before HMC')
axs[3, i].set_title(str(5+i) + ' after HMC')
for j in range(4):
axs[j, i].set_xlim(-3, 3)
axs[j, i].set_ylim(-3, 3)
X_index = 0 # index=0 -> 5, index=1 -> 0, index=2 -> 4, index=3 -> 1, index=24 -> underlined 1, index=39 -> ugly 6
num_repeats = 1000
fig, axs = plt.subplots(1, 2, figsize=(6, 3))
img = tile_raster_images(np.array([X[X_index, :]]), image_dims, (1, 1), (1, 1))
axs[0].imshow(img, cmap=cm.binary)
img = tile_raster_images(np.array([X_no_bin[X_index, :]]), image_dims, (1, 1), (1, 1))
axs[1].imshow(img, cmap=cm.binary)
repeated_X = cast_array_to_local_type(np.tile(np.array([X[X_index, :]]), (num_repeats, 1)).astype('float32'))
full_sample = f_full_sample(repeated_X).astype('float32')
z_samples = full_sample[:, :num_repeats, :]
v_samples = full_sample[:, num_repeats:, :]
z_sample_final_mean = z_samples[m.n_hmc_steps, :, :].mean(axis=0)
z_sample_final_std = z_samples[m.n_hmc_steps, :, :].std(axis=0)
single_X = cast_array_to_local_type(np.array([X[X_index, :]]).astype('float32'))
init_mean = f_z_init_mean(single_X)[0]
init_var = f_z_init_var(single_X)[0]
init_vel_var = f_v_init_var(single_X)[0]
print 'Posterior distribution statistics'
print
print 'Initial model: - Mean: ' + str(init_mean)
print ' - Var: ' + str(init_var)
print
print 'Full HVI model: - Mean: ' + str(z_sample_final_mean)
print ' - Var: ' + str(z_sample_final_std ** 2)
print
print 'Velocity model variance: ' + str(init_vel_var)
dim1 = 0
dim2 = 1
resolution = 201
lower_dim1_limit = z_sample_final_mean[dim1] - 0.2
upper_dim1_limit = z_sample_final_mean[dim1] + 0.2
lower_dim2_limit = z_sample_final_mean[dim2] - 0.2
upper_dim2_limit = z_sample_final_mean[dim2] + 0.2
number_images_per_axis = 11
latent_array = np.zeros((number_images_per_axis**2, 2))
gap_between_images = (resolution - 1)//(number_images_per_axis - 1)
indices_for_images = np.arange(0, resolution, gap_between_images)
pot_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
x = np.linspace(lower_dim1_limit, upper_dim1_limit, resolution)
y = np.linspace(lower_dim2_limit, upper_dim2_limit, resolution)
for i in range(resolution):
for j in range(resolution):
#pos_array = f_z_init_mean(single_X)
pos_array = np.array([z_sample_final_mean])
pos_array[0, dim1] = x[i]
pos_array[0, dim2] = y[j]
pos_array_of_local_type = cast_array_to_local_type(pos_array)
pot_energy_matrix[j, i] = f_pot_en(single_X, pos_array_of_local_type)[0]
if i in indices_for_images and j in indices_for_images:
latent_array[(i//gap_between_images) + (number_images_per_axis - 1 - j//gap_between_images)*number_images_per_axis , :] = pos_array[0, :]
print 'Minimum potential energy (at grid points): ' + str(pot_energy_matrix.min())
print 'Maximum potential energy (at grid points): ' + str(pot_energy_matrix.max())
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
CS = axs[0].contour(x, y, pot_energy_matrix, 20)
plt.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)
axs[0].set_title('Potential energy surface')
F = f_gen_rate(cast_array_to_local_type(latent_array))
img = tile_raster_images(F, image_dims, (number_images_per_axis, number_images_per_axis), (1, 1))
#axs.imshow(img, cmap=cm.nipy_spectral)
axs[1].imshow(img, cmap=cm.binary)
plt.show()
resolution = 200
underlying_variance = f_v_init_var(single_X)
velocity_range_for_images = 10.0 * np.sqrt(underlying_variance[0, :])
lower_dim1_limit = np.around(- velocity_range_for_images[dim1])
upper_dim1_limit = np.around( velocity_range_for_images[dim1])
lower_dim2_limit = np.around(- velocity_range_for_images[dim2])
upper_dim2_limit = np.around( velocity_range_for_images[dim2])
kin_energy_matrix = np.zeros((resolution, resolution), dtype='float32')
kin_x = np.linspace(lower_dim1_limit, upper_dim1_limit, resolution)
kin_y = np.linspace(lower_dim2_limit, upper_dim2_limit, resolution)
for i in range(resolution):
for j in range(resolution):
vel_array = np.zeros((1, m.n_latent)).astype('float32')
vel_array[0, dim1] = kin_x[i]
vel_array[0, dim2] = kin_y[j]
vel_array_of_local_type = cast_array_to_local_type(vel_array)
kin_energy_matrix[j, i] = f_kin_en(single_X, vel_array_of_local_type)
print 'Minimum kinetic energy (at grid points): ' + str(kin_energy_matrix.min())
print 'Maximum kinetic energy (at grid points): ' + str(kin_energy_matrix.max())
fig, ax = plt.subplots(1, 1, figsize=(9, 9))
CS = ax.contour(kin_x, kin_y, kin_energy_matrix)
plt.clabel(CS, inline=1, fmt='%1.1f', fontsize=10)
ax.set_title('Kinetic energy surface')
plt.show()
fig, axs = plt.subplots(m.n_hmc_steps + 1, 3, figsize=(18, (m.n_hmc_steps + 1) * 6))
colors = cm.jet(np.linspace(0, 1, 10))
#contour_levels = (198, 200, 202, 204, 206, 208, 210)
#contour_levels = (130, 140, 150, 160, 180, 200, 240, 280)
#contour_levels = (100, 102, 104, 106, 108, 110, 115, 120, 125, 130)
#contour_levels = (400, 402, 404, 406, 408, 410, 412, 416, 420)
#contour_levels = (106, 108, 110, 112, 114, 116, 118, 120, 124, 128)
contour_levels = (160, 165, 170, 175, 180, 185, 190, 195, 200, 210, 220, 230, 240, 250, 270, 300)
#contour_levels = (174, 175, 176, 177, 178, 180, 182, 184, 186, 190, 200)
#contour_levels = (59, 61, 63, 65, 67, 69, 71, 73, 75, 80, 85, 90)
vel_contour_levels = np.linspace(2.0, 70.0, 18)
#CS0 = axs[0, 0].contourf(x, y, pot_energy_matrix, np.linspace(155, 240, 500))
def colour_for_z_samples(samples):
mean = samples.mean(axis=0)
mean1 = mean[dim1]
mean2 = mean[dim2]
colour = np.zeros_like(samples[:, 0])
colour[np.logical_and(samples[:, dim1] < mean1, samples[:, dim2] < mean2)] = 0
colour[np.logical_and(samples[:, dim1] < mean1, samples[:, dim2] >= mean2)] = 2
colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] < mean2)] = 4
colour[np.logical_and(samples[:, dim1] >= mean1, samples[:, dim2] >= mean2)] = 7
colour[((samples[:, dim1] - mean1) ** 2 + (samples[:, dim2] - mean2) ** 2) < 1e-5] = 9
return colour.astype('int32')
colour = colour_for_z_samples(z_samples[m.n_hmc_steps,:,:])
print v_samples[m.n_hmc_steps, colour == 0, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 2, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 4, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 7, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 9, :].mean(axis=0)
print v_samples[m.n_hmc_steps, colour == 0, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 2, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 4, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 7, :].var(axis=0)
print v_samples[m.n_hmc_steps, colour == 9, :].var(axis=0)
for i in range(m.n_hmc_steps + 1):
CS = axs[i, 0].contour(x, y, pot_energy_matrix, contour_levels)
plt.clabel(CS, inline=1, fmt='%1.0f', fontsize=10)
axs[i, 0].scatter(z_samples[i,:,dim1], z_samples[i,:,dim2], c=colors[colour_for_z_samples(z_samples[i,:,:])], s=20, alpha=.3, lw=0)
CS_vel = axs[i, 1].contour(kin_x, kin_y, kin_energy_matrix, vel_contour_levels)
plt.clabel(CS_vel, inline=1, fmt='%1.1f', fontsize=10)
axs[i, 1].scatter(v_samples[i,:,dim1], v_samples[i,:,dim2], c=colors[colour_for_z_samples(z_samples[i,:,:])], s=20, alpha=.3, lw=0)
pot_energy_distrib = f_pot_en(repeated_X, cast_array_to_local_type(z_samples[i, :, :]))
if i == 0:
max_x_value_for_hist = pot_energy_distrib.max() + 5
min_x_value_for_hist = np.floor(pot_energy_matrix.min()) -5
pot_energy_distrib_mean = pot_energy_distrib.mean()
axs[i, 2].hist(pot_energy_distrib, 30, normed=1, range=(min_x_value_for_hist, max_x_value_for_hist))
axs[i, 2].autoscale(enable=False, axis='both')
axs[i, 2].axvline(pot_energy_distrib_mean, color='r', linestyle='dashed', linewidth=2)
axs[i, 2].set_xlim(min_x_value_for_hist, max_x_value_for_hist)
axs[i, 2].text(pot_energy_distrib_mean + 1.0, 0.8*axs[i, 2].get_ylim()[1], 'Mean: ' + str(pot_energy_distrib_mean))
axs[i, 1].set_xlim(-velocity_range_for_images[dim1], velocity_range_for_images[dim1])
axs[i, 1].set_ylim(-velocity_range_for_images[dim2], velocity_range_for_images[dim2])
axs[0, 0].scatter(f_z_init_mean(single_X)[0, dim1], f_z_init_mean(single_X)[0, dim2], c='black', s=20)
plt.show()
np.random.seed(1)
velocity_noise = cast_array_to_local_type(np.random.normal(size=(m.n_hmc_steps, 1, m.n_latent)))
#velocity_noise = np.zeros_like(velocity_noise)
init_pos = f_z_init_mean(single_X) + np.array([0.0, 0.1])
init_vel = f_kin_energy_sample_from_noise(single_X, velocity_noise[0])
num_vels_per_hmc = (m.n_lf_steps + 2)
position_array = np.zeros((m.n_hmc_steps * m.n_lf_steps + 1, m.n_latent))
position_array[0] = init_pos
velocity_array = np.zeros((m.n_hmc_steps * num_vels_per_hmc, m.n_latent))
velocity_array[0] = ma.assert_numpy(init_vel)
for hmc_num in range(m.n_hmc_steps):
if hmc_num == 0:
curr_pos = cast_array_to_local_type(init_pos)
curr_vel = init_vel
else:
curr_vel = f_updated_vel_from_noise(single_X, curr_vel, velocity_noise[hmc_num])
velocity_array[hmc_num * (m.n_lf_steps + 2)] = ma.assert_numpy(curr_vel)
lf_step_results = f_perform_lf_steps(single_X, curr_pos, curr_vel)
pos_steps = lf_step_results[:m.n_lf_steps]
vel_half_steps_and_final = lf_step_results[m.n_lf_steps:]
final_vel = lf_step_results[-1]
final_pos = pos_steps[-1]
position_array[hmc_num * m.n_lf_steps + 1: (hmc_num + 1)*m.n_lf_steps + 1] = ma.assert_numpy(pos_steps[:, 0, :])
velocity_array[hmc_num * num_vels_per_hmc + 1: (hmc_num + 1) * num_vels_per_hmc] = ma.assert_numpy(vel_half_steps_and_final[:, 0, :])
curr_pos = final_pos
curr_vel = final_vel
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
step_color = cm.jet(np.linspace(0, 1, position_array.shape[0]))
CS = axs[0].contour(x, y, pot_energy_matrix, contour_levels)
CS_vel = axs[1].contour(kin_x, kin_y, kin_energy_matrix, vel_contour_levels)
hmc_step_indices = np.arange(0, position_array.shape[0], m.n_lf_steps)
size_array = 40*np.ones((position_array.shape[0],))
size_array[hmc_step_indices] = 100
axs[0].scatter(position_array[:, dim1], position_array[:, dim2], c=step_color, lw=1, s=size_array)
axs[1].set_color_cycle(step_color)
for hmc_num in range(m.n_hmc_steps):
curr_vel_range = np.arange(num_vels_per_hmc * hmc_num, num_vels_per_hmc * (hmc_num + 1) - 2)
init_vel_ind = hmc_num * num_vels_per_hmc
final_vel_ind = (hmc_num + 1) * num_vels_per_hmc - 1
curr_index = hmc_step_indices[hmc_num]
next_index = hmc_step_indices[hmc_num + 1]
for j in curr_vel_range:
axs[1].plot(velocity_array[j:j+2, dim1], velocity_array[j:j+2, dim2], lw=2)
axs[1].scatter(velocity_array[init_vel_ind, dim1], velocity_array[init_vel_ind, dim2], c=step_color[curr_index], lw=0, s=100)
axs[1].scatter(velocity_array[final_vel_ind, dim1], velocity_array[final_vel_ind, dim2], c=step_color[next_index], lw=0, s=100)
for hmc_num in range(m.n_hmc_steps):
final_vel_ind = (hmc_num + 1) * num_vels_per_hmc - 1
next_index = hmc_step_indices[hmc_num + 1]
axs[1].plot(velocity_array[final_vel_ind-1:final_vel_ind+1, dim1], velocity_array[final_vel_ind-1:final_vel_ind+1, dim2], lw=2, c=step_color[next_index])
variation_start = z_sample_final_mean - 2*z_sample_final_std
variation_end = z_sample_final_mean + 2*z_sample_final_std
final_vel_model_mean_output = np.zeros((m.n_latent, num_repeats, m.n_latent))
final_vel_model_var_output = np.zeros((m.n_latent, num_repeats, m.n_latent))
for variation_dim in range(m.n_latent):
z_variation = np.linspace(variation_start[variation_dim], variation_end[variation_dim], num_repeats)
sample_array = np.tile(z_sample_final_mean, (num_repeats, 1))
sample_array[:, variation_dim] = z_variation
final_vel_model_mean_output[variation_dim] = f_v_final_mean(repeated_X, cast_array_to_local_type(sample_array))
final_vel_model_var_output[variation_dim] = f_v_final_var(repeated_X, cast_array_to_local_type(sample_array))
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(final_vel_model_mean_output[:, :, dim1],
final_vel_model_mean_output[:, :, dim2],
c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))),
lw=0, s=5)
axs[1].scatter(final_vel_model_var_output[:, :, dim1],
final_vel_model_var_output[:, :, dim2],
c=np.transpose(np.tile(np.linspace(0,m.n_latent-1,m.n_latent), (num_repeats, 1))),
lw=0, s=5)
plt.show()
final_z_samples = cast_array_to_local_type(z_samples[m.n_hmc_steps, :, :])
final_v_samples = cast_array_to_local_type(v_samples[m.n_hmc_steps, :, :])
final_vel_mean = f_v_final_mean(repeated_X, final_z_samples)
final_vel_var = f_v_final_var(repeated_X, final_z_samples)
final_vel_nll = f_v_final_model_nll(repeated_X, final_z_samples, final_v_samples)
print f_kin_energy_nll(single_X).sum(-1)
print final_vel_nll.mean()
print final_vel_nll.min()
print final_vel_nll.max()
fig, axs = plt.subplots(4, 2, figsize=(18, 36))
# TODO: Analysis of how final_vel_mean and final_vel_var depend on z (since they all share the same x)
print z_samples[3, :, :].mean(axis=0)
print z_samples[3, :, :].var(axis=0)
print v_samples[3, :, :].mean(axis=0)
print v_samples[3, :, :].var(axis=0)
print f_v_init_var(np.array([X[X_index, :]]))
print final_vel_nll.mean()
plt.boxplot(final_vel_nll, whis=1)
plt.show()
centers = np.zeros((10,n_latents))
stddevs = np.zeros((10,n_latents))
centers_init = np.zeros((10,n_latents))
stddevs_init = np.zeros((10,n_latents))
for i in range(10):
Li = f_z_sample(X[Z.argmax(1) == i])
centers[i] = Li.mean(axis=0)
stddevs[i] = np.std(Li, axis=0)
Li_init = f_z_init_sample(X[Z.argmax(1) == i])
centers_init[i] = Li_init.mean(axis=0)
stddevs_init[i] = np.std(Li_init, axis=0)
fig, axs = plt.subplots(1, 2, figsize=(18, 9))
axs[0].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[0].scatter(centers_init[:, dim1], centers_init[:, dim2], c=range(10), s=50, marker=u's')
axs[1].scatter(centers[:, dim1], centers[:, dim2], c=range(10), s=50)
axs[1].scatter(centers[:, dim1] + stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'>')
axs[1].scatter(centers[:, dim1] - stddevs[:, dim1], centers[:, dim2], c=range(10), s=50, marker=u'<')
axs[1].scatter(centers[:, dim1], centers[:, dim2] + stddevs[:, dim2], c=range(10), s=50, marker=u'^')
axs[1].scatter(centers[:, dim1], centers[:, dim2] - stddevs[:, dim2], c=range(10), s=50, marker=u'v')
#axs[0].set_xlim(-1.2, 1.2)
#axs[0].set_ylim(-1.2, 1.2)
#axs[1].set_xlim(-1.2, 1.2)
#axs[1].set_ylim(-1.2, 1.2)
print (centers[:, dim1] - centers_init[:, dim1])
print (centers[:, dim2] - centers_init[:, dim2])
print (stddevs[:, dim1] - stddevs_init[:, dim1])
print (stddevs[:, dim2] - stddevs_init[:, dim2])